Package installation

if(params$outcome=="cases") {
#This is for the cases:
url2 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_nation_cases.rds"
download.file(url2, "eval_cases.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_cases.RDS"))
scores <- subset(scores,  forecaster %in% params$forecasters)
}

if(params$outcome=="deaths"){
url1 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_deaths.rds"
download.file(url1, "eval_deaths.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_deaths.RDS"))
scores <- subset(scores,  forecaster %in% params$forecasters)
}

if(params$outcome=="hospitalizations"){
url1 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_hospitalizations.rds"
download.file(url1, "eval_hospitalizations.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_hospitalizations.RDS"))
scores <- subset(scores,  forecaster %in% params$forecasters)
}

primary_forecaster <- params$primary_forecaster
our_pred_dates <- 
  scores %>%
  filter(forecaster == "CMU-TimeSeries") 

our_pred_dates <- unique(our_pred_dates$forecast_date) 
n_dates <- length(our_pred_dates)

# n_dates - 4 is often the most recent date with ground truth for 4 weeks ahead
# dates spaced out 2 weeks ahead to see different behaviors
forecast_dates <- our_pred_dates[n_dates- 2 *5:2]
scores$forecast_date <- 
  if_else(scores$forecaster %in% c("Karlen-pypm", "CU-select"), scores$forecast_date + 1, scores$forecast_date)

scores <- subset(scores, forecast_date %in% forecast_dates)
results <- intersect_averagers(scores, c("forecaster"), c("forecast_date", "geo_value")) %>%
  select(c("ahead", "geo_value", "forecaster","forecast_date", "data_source", "signal","target_end_date","incidence_period","actual","wis","ae","cov_80"))

results %>%
  group_by(forecast_date) %>%
  summarise(n_distinct(geo_value))
## # A tibble: 3 x 2
##   forecast_date `n_distinct(geo_value)`
##   <date>                          <int>
## 1 2021-05-24                         48
## 2 2021-06-07                         48
## 3 2021-06-21                         48

Downloading the data

Retrieving prediction data of the selected forecasters

results %>%
  download_this(
    output_name = "results",
    output_extension = ".csv",
    button_label = "Download Predictions Evaluation",
    button_type = "success",
    has_icon = TRUE,
    csv2 = FALSE,
    icon = "fa fa-save"
  )

Overall AE, WIS, Coverage 80

NOTE: Results are based on the following numbers of common locations

weeks.label = c("1 week ahead", "2 weeks ahead", "3 weeks ahead", "4 weeks ahead")
names(weeks.label) = c(1, 2, 3, 4)

subtitle = sprintf("Forecasts made over %s to %s",
                   format(min(forecast_dates), "%B %d, %Y"),
                   format(max(forecast_dates), "%B %d, %Y"))

plot_ae <-
  plot_canonical(results, 
                 x = "ahead", 
                 y = "ae", 
                 aggr = mean) +
  labs(title = subtitle, x= "Weeks Ahead", y = "Mean AE",color='Forecasters') +  
  geom_point(aes(text=sprintf("Weeks Ahead: %s<br>Average Error: %s <br>Forecaster: %s", 
                              ahead, 
                              round(ae, digits=2),
                              color)),
             alpha = 0.05) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_y_log10()

if (params$colorblind_palette) {
  plot_ae <- plot_ae + 
    scale_color_viridis_d()
}

ggplotly(plot_ae, tooltip="text", width=1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))
plot_wis <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "wis", 
                 aggr = mean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead") + 
  labs(title = subtitle, 
       x = "Forecast Dates", 
       y = "Mean WIS",
       color = "Forecasters") +
  geom_point(aes(text=sprintf("Forecast Date: %s<br>Mean WIS: %s <br>Forecaster: %s", 
                              forecast_date, 
                              round(wis, digits = 2),
                              color)),
             alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=weeks.label)) +
  theme(strip.background = element_rect(fill = "white")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) + 
  scale_y_log10()

if (params$colorblind_palette) {
  plot_wis <- plot_wis + 
    scale_color_viridis_d()
}

ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))
plot_cov80 <-
  plot_canonical(results, 
                 x = "forecast_date", 
                 y = "cov_80", 
                 aggr = mean,
                 grp_vars = c("forecaster","ahead"), 
                 facet_rows = "ahead") +
  labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80", color='Forecasters') +
  geom_point(aes(text = sprintf("Forecast Date: %s<br>Coverage: %s <br>Forecaster: %s", 
                                forecast_date, 
                                round(cov_80, digits = 2),
                                color)),
             alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
  theme(strip.background = element_rect(fill = "white")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) +
  geom_line(mapping = aes(y = .8), )

if (params$colorblind_palette) {
  plot_cov80 <- plot_cov80 + 
    scale_color_viridis_d()
}

ggplotly(plot_cov80, tooltip="text", height=800, width=1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

(Geometric) Mean relative WIS

Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s.

geom_mean <- function(x) prod(x)^(1/length(x))
#geom_mean <- exp(mean(log((x+1)/(y+1)))) #still need to figure this out

mean_wis <- 
  plot_canonical(results %>% 
                   filter(wis > 0), 
                 x = "ahead", 
                 y = "wis", 
                 aggr = geom_mean,
                 base_forecaster = "COVIDhub-baseline", 
                 scale_before_aggr = TRUE) + 
  labs(title = subtitle, 
       x = "Weeks Ahead", 
       y = "Mean (geometric) relative WIS", 
       color = "Forecasters") +
  geom_point(aes(text = sprintf("Weeks Ahead: %s<br>WIS: %s <br>Forecaster: %s", 
                                ahead, 
                                round(wis, digits = 2),
                                color)),
             alpha = 0.05) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) +
  geom_line(mapping = aes(y = 1))

if (params$colorblind_palette) {
  mean_wis <- mean_wis + 
    scale_color_viridis_d()
}

ggplotly(mean_wis, tooltip="text", width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))
mean_wis_forecast_date <- 
plot_canonical(results %>% 
                 filter(wis > 0), 
               x = "forecast_date", 
               y = "wis", 
               aggr = geom_mean, 
               facet_rows = "ahead",
               grp_vars = c("forecaster", "ahead"),
               base_forecaster = "COVIDhub-baseline", 
               scale_before_aggr = TRUE) +
  theme(legend.position = "bottom") + 
  labs(title = subtitle, 
       x = "Forecast date", 
       y = "Mean (geometric) relative WIS", 
       color = "Forecasters") +
  geom_point(aes(text = sprintf("Forecast Date: %s<br>WIS: %s <br>Forecaster: %s", 
                                forecast_date, 
                                round(wis, digits = 2),
                                color)),
             alpha = 0.05) +
  facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
  theme(strip.background = element_rect(fill = "white")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = "center")) +
  geom_line(mapping = aes(y = 1))

if (params$colorblind_palette) {
  mean_wis_forecast_date <- mean_wis_forecast_date + 
    scale_color_viridis_d()
}

ggplotly(mean_wis_forecast_date, tooltip = "text", height=800, width= 1000) %>% 
  layout(hoverlabel = list(bgcolor = "white"))

Maps (mean score over forecast dates and aheads)

Note that the results are scaled by population.

library(sf)
maps <- results %>%
  group_by(geo_value, forecaster) %>%
  summarise(across(wis:cov_80, mean)) %>%
  left_join(animalia::state_population, by = "geo_value") %>%
  mutate(across(wis:cov_80, ~ .x / population * 1e5)) %>%
  pivot_longer(wis:cov_80, names_to = "score") %>%
  group_by(score) %>%
  mutate(time_value = Sys.Date(),
         r = max(value)) %>%
  group_by(forecaster, .add = TRUE) %>%
  group_split()
# for county prediction, set geo_type = "county"
maps <- purrr::map(maps, 
                   ~as.covidcast_signal(
                     .x, signal = .x$score[1], 
                     data_source = .x$forecaster[1], 
                     geo_type = "state"))

maps <- purrr::map(maps,
                   ~plot(.x, 
                         choro_col = scales::viridis_pal()(3),
                         range = c(0,.x$r[1])))

nfcasts <- length(unique(results$forecaster))

Mean AE

# original code
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)

Mean Coverage_80

cowplot::plot_grid(plotlist = maps[(nfcasts+1):(nfcasts*2)], ncol = 3)

Mean WIS

cowplot::plot_grid(plotlist = maps[((nfcasts*2)+1):length(maps)], ncol = 3)

Trajectory plots

Work In Progress

options(timeout=200)
url4 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/predictions_cards.rds"
download.file(url4, "predictions.RDS") # download to disk
state_predictions <- readRDS(paste0(here(), "/predictions.RDS"))


# Check how many aheads in the state_predictions data
# Check the forecast_end_date column

Trajectory plots

For county predictions, you might want to change the fig.height and fig.width chunk options here (in other notebooks we use fig.height = 120 and fig.width = 30).

# for county prediction, set geo_type = "county"
state.label = c(state.name, "Washington D.C.", "Porto Rico")
names(state.label) = c(tolower(state.abb), "dc", "pr")

# trajectory plots for selected forecaster
pd <- evalcast:::setup_plot_trajectory(
  state_predictions %>% filter(forecaster=="CMU-TimeSeries" & forecast_date %in% forecast_dates),
  intervals = 0.8,
  geo_type = "state",
  start_day = min(forecast_dates) - 60)

g <- ggplot(pd$truth_df, mapping = aes(x = target_end_date))
# build the fan
g <- g + geom_ribbon(
  data = pd$quantiles_df %>%
    mutate(upper = round(upper, 2),
           lower = round(lower, 2)),
  mapping = aes(ymin = lower, 
                ymax = upper, 
                fill = forecaster,
                group = interaction(forecaster, forecast_date)),
  alpha = .1) +
  scale_fill_viridis_d(begin=.15, end=.85)

# line layer
g <- g +
  #geom_line(aes(y = .data$value.y), color = "#3182BD") + # corrected
  geom_line(aes(y = value), size = .5) + # reported
  geom_line(data = pd$points_df,
            mapping = aes(y = value,
                          color = forecaster, 
                          group = interaction(forecaster, forecast_date)), 
            size = .5) +
  geom_point(aes(y = value), size = 1) + # reported gets dots
  geom_point(data = pd$points_df %>%
               mutate(value = round(value, 2)),
             mapping = aes(y = value, color = forecaster),
             size = 1) +
  scale_color_viridis_d(begin=.15, end=.85)
states <- g  +
  facet_wrap(~geo_value, 
             scales = "free_y", 
             ncol = 3, 
             labeller = labeller(geo_value = state.label)) +
  labs(x = "", y = "") +
  theme_bw() + 
  theme(legend.position = "none", strip.background = element_rect(fill = "white"))

ggplotly(states, height=5000, width= 1000)